package com.demo.java.crawl.processor;

import com.demo.java.crawl.utils.PageUtils;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

@Component
public class CSDNProcessor implements PageProcessor {
    private Site site = Site.me().setRetryTimes(3).setSleepTime(0);

    @Override
    public void process(Page page) {
        page.putField("title", PageUtils.xpathText(page, "h1[@id='articleContentId']"));
        page.putField("content", PageUtils.xpathEle(page, "article[@class='baidu_pl']/div[@id='article_content']"));
        page.putField("author", PageUtils.xpathText(page, "div[@class='bar-content']/a[@class='follow-nickName']"));
        page.putField("category", PageUtils.xpathText(page, "div[@class='artic-tag-box']/a[@class='tag-link'][1]"));
        page.putField("tags", PageUtils.xpathText(page, "div[@class='artic-tag-box']/a[@class='tag-link']"));//[position()>1]
        page.putField("createTime", PageUtils.xpathText(page, "span[@class='time']"));
        page.putField("views", PageUtils.xpathText(page, "span[@class='read-count']"));
        page.putField("collects", PageUtils.xpathText(page, "span[@class='get-collection']"));
        page.putField("thumb", PageUtils.xpathText(page, "span[@id='is-like-span']"));
        page.putField("talk", PageUtils.xpathText(page, "span[@id='spanCount']"));
        // https://imgconvert.csdnimg.cn/aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwOTAzMTEyMDIxOTM3?x-oss-process=image/format,png
        System.out.println("end");
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        Spider.create(new CSDNProcessor()).addUrl("https://blog.csdn.net/boling_cavalry/article/details/77793224").run();
    }
}
